Priming response (enhanced survival upon secondary infection) has been demonstrated with different routes (septic and oral) in the red flour beetle Tribolium castaneum and shows to have high degree of specificity. Some studies have pointed out the importance of the hosts natural microbiota, suggesting that priming might be partially explained by the presence of commensal bacteria in the gut. Using a well established model system for studying host-parasite interaction and insect immunity, red flour beetle Tribolium castaneum and enthomopathogenic bacterium Bacillus thurigiensis (Bt), we have studied if priming could influence microbiome composition of the beetle larvae. We conducted an experiment using the two established routes of priming in this system: injection with heat-killed Bt and oral via ingestion of filtered sterilized bacterial spore supernatants by beetle larvae, with diverse strains of Bt varying in their ability to induce priming. Microbiota composition was assessed after the priming treatment by deep sequencing of the v1-v2 region of the bacterial 16S rRNA gene.
For sequencing, variable regions V1 and V2 of the 16S rRNA gene within the DNA samples were amplified using the primer pair 27F-338R in a dual-barcoding approach as per Caporaso et al. 2012. 3.5 µl of cDNA was used for amplification and PCR products were verified using the electrophoresis in agarose gel. PCR products were normalized using the SequalPrep Normalization Plate Kit, pooled equimolarly, and sequenced on the Illumina MiSeq v3 2x300bp. Demultiplexing after sequencing was based on 0 mismatches in the barcode sequences.
library(rmarkdown)
library(dada2)
library(ggplot2)
library(phyloseq)
library(DECIPHER)
library(phangorn)
library(ape)
library(decontam)
setwd("/home/shrey/ana_metagenome/Upload/")
# Loading the Forward and reverse fastq files for all the samples:
path <- "ngs_data"
fnFs <- sort(list.files(path, pattern="_R1.fastq.gz", full.names = TRUE))
fnRs <- sort(list.files(path, pattern="_R2.fastq.gz", full.names = TRUE))
# Extract sample names:
sample.names <- sapply(strsplit(basename(fnFs), "_"), `[`, 1)
# Plotting the quality profiles of the reads of first 10 samples:
temp <- sample.names[1:8]
names(temp) <- basename(fnFs)[1:8]
plotQualityProfile(fnFs[1:8]) + ggtitle("Forward") + facet_wrap(~file, ncol = 4, labeller=as_labeller(temp))
names(temp) <- basename(fnRs)[1:8]
plotQualityProfile(fnRs[1:8]) + ggtitle("Reverse") + facet_wrap(~file, ncol = 4, labeller=as_labeller(temp))
# Trimming the fastq files:
filtFs <- file.path(path, "filtered", paste0(sample.names, "_F_filt.fastq.gz"))
filtRs <- file.path(path, "filtered", paste0(sample.names, "_R_filt.fastq.gz"))
names(filtFs) <- sample.names
names(filtRs) <- sample.names
out <- filterAndTrim(fnFs, filtFs, fnRs, filtRs, trimLeft = c(5,5) , truncLen=c(240,220),
maxN=0, maxEE=c(2,2), truncQ=2, rm.phix=TRUE,
compress=TRUE, multithread=TRUE)
## Checking quality after trimming the reads:
temp <- sample.names[1:8]
names(temp) <- basename(filtFs)[1:8]
plotQualityProfile(filtFs[1:8]) + ggtitle("Forward") + facet_wrap(~file, ncol = 4, labeller=as_labeller(temp))
names(temp) <- basename(filtRs)[1:8]
plotQualityProfile(filtRs[1:8]) + ggtitle("Reverse") + facet_wrap(~file, ncol = 4, labeller=as_labeller(temp))
# De-replication:
derepFs <- derepFastq(filtFs)
derepRs <- derepFastq(filtRs)
# Name the derep-class objects by the sample names
names(derepFs) <- sample.names
names(derepRs) <- sample.names
#Learning the error rates:
errF <- learnErrors(filtFs, multithread=TRUE, verbose = F)
## 101048355 total bases in 429993 reads from 9 samples will be used for learning the error rates.
errR <- learnErrors(filtRs, multithread=TRUE, verbose = F)
## 111423535 total bases in 518249 reads from 11 samples will be used for learning the error rates.
plotErrors(errF, nominalQ=TRUE)
plotErrors(errR, nominalQ=TRUE)
#Creating dada objects:
dadaFs <- dada(derepFs, err=errF, multithread=TRUE, verbose = F)
dadaRs <- dada(derepRs, err=errR, multithread=TRUE, verbose = F)
# Merge paired reads
mergers <- mergePairs(dadaFs, derepFs, dadaRs, derepRs, verbose = F)
# Constructing ASV sequence table
seqtab <- makeSequenceTable(mergers)
# Inspect distribution of sequence lengths:
table(nchar(getSequences(seqtab)))
##
## 235 238 239 244 246 249 251 252 254 258 261 262 263 264 265 266 267 268 269 270 271
## 1 2 1 1 2 2 1 1 2 1 4 1 1 9 18 352 79 1068 278 340 143
## 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292
## 120 15 49 3 31 15 56 39 33 61 106 128 99 179 91 243 120 245 157 195 672
## 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313
## 394 2003 721 775 402 571 304 845 183 190 416 494 206 340 359 421 220 266 65 47 128
## 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334
## 108 41 60 53 126 76 22 11 52 11 7 1 3 6 8 10 5 22 83 301 45
## 335 336 337 340 342 352 358 360 371 391
## 1 1 8 2 3 2 1 7 1 1
# Remove chimeras
seqtab.nochim <- removeBimeraDenovo(seqtab, method="consensus", multithread=TRUE, verbose=F)
# Chimeras account for 4.1% of the merged sequence reads:
(1- sum(seqtab.nochim)/sum(seqtab)) *100
## [1] 4.175729
#Tracking reads through the pipeline:
getN <- function(x) sum(getUniques(x))
track <- cbind(out, sapply(dadaFs, getN), sapply(dadaRs, getN), sapply(mergers, getN), rowSums(seqtab.nochim))
colnames(track) <- c("input", "filtered", "denoisedF", "denoisedR", "merged", "nonchim")
rownames(track) <- sample.names
paged_table(as.data.frame(track))
rm(getN,out,filtFs,filtRs,dadaFs,dadaRs,fnFs,fnRs,derepFs,derepRs,errF,errR,mergers,seqtab,temp)
# Assigning taxonomy using Silva database
taxa <- assignTaxonomy(seqtab.nochim, "database/silva 138/silva_nr99_v138_train_set.fa.gz",
multithread=TRUE, verbose = FALSE)
taxa <- addSpecies(taxa, "database/silva 138/silva_species_assignment_v138.fa.gz")
# making a fasta of ASV seqs:
asv_seqs <- colnames(seqtab.nochim)
asv_headers <- vector(dim(seqtab.nochim)[2], mode="character")
asv_fasta <- asv_seqs
for (i in 1:dim(seqtab.nochim)[2]) {
asv_headers[i] <- paste(">ASV", i, sep="_")
}
asv_fasta <- c(rbind(asv_headers, asv_seqs))
# Making ASV count table:
asv_tab <- t(seqtab.nochim)
row.names(asv_tab) <- sub(">", "", asv_headers)
asv_tab<-as.matrix(asv_tab)
#Making ASV taxanomy table:
asv_tax <- taxa
row.names(asv_tax) <- sub(">", "", asv_headers)
paged_table(as.data.frame(asv_tax))
# DADA2 complete.
# Reading the table with samples and treatments
sample_table <- as.data.frame(read.csv("Sample_table.csv",sep=";" ))
sample_table <- sample_table[1:90,c(1,4:11)]
rownames(sample_table) <- sample_table$Samples
paged_table(sample_table)
count_phy <- otu_table(asv_tab, taxa_are_rows=T)
sample_table_phy <- sample_data(sample_table)
tax_tab_phy<-tax_table(asv_tax)
# Creating Phylogenetic Tree of ASVs:
# Aligning the sequences:
seqs <-(grep(pattern = ">ASV", asv_fasta,invert = T,value = T))
names(seqs) <- seqs
alignment <- AlignSeqs(DNAStringSet(seqs), anchor=NA, verbose = F)
phang.align <- phyDat(as(alignment, "matrix"), type="DNA")
write.phyDat(phang.align, file="alignment.fasta", format="fasta")
# Constructing the tree using FastTree:
system("./FastTreeMP -gtr -nt alignment.fasta > gene-tree_all.tre", ignore.stderr = T)
# Importing the tree
fitGTR <- read.tree(file = "gene-tree_all.tre")
tree = phy_tree(fitGTR)
taxa_names(tree) <- sub(">", "", asv_headers)
# Creating a phyloseq object:
ps<- phyloseq(count_phy,sample_table_phy,tax_tab_phy,tree)
ps
## phyloseq-class experiment-level object
## otu_table() OTU Table: [ 10933 taxa and 90 samples ]
## sample_data() Sample Data: [ 90 samples by 9 sample variables ]
## tax_table() Taxonomy Table: [ 10933 taxa by 7 taxonomic ranks ]
## phy_tree() Phylogenetic Tree: [ 10933 tips and 10931 internal nodes ]
# Plotting the library size:
df <- as.data.frame(sample_data(ps))
df$LibrarySize <- sample_sums(ps)
df <- df[order(df$LibrarySize),]
df$Index <- seq(nrow(df))
ggplot(data=df, aes(x=Index, y=LibrarySize, color=Sample)) + geom_point()
sample_data(ps)$is.neg <- sample_data(ps)$Sample == "control"
contamdf.prev <- isContaminant(ps, method="prevalence", neg="is.neg")
table(contamdf.prev$contaminant)
##
## FALSE TRUE
## 10858 75
# threshold=0.5, will identify as contaminants all those ASVs which are more prevalent in negative controls than in the samples.
contamdf.prev05 <- isContaminant(ps, method="prevalence", neg="is.neg", threshold=0.5)
table(contamdf.prev05$contaminant)
##
## FALSE TRUE
## 10655 278
ps.pa <- transform_sample_counts(ps, function(abund) 1*(abund>0))
ps.pa.neg <- prune_samples(sample_data(ps.pa)$Sample == "control", ps.pa)
ps.pa.pos <- prune_samples(sample_data(ps.pa)$Sample == "Sample", ps.pa)
# Dataframe of prevalence in positive and negative samples:
df.pa <- data.frame(pa.pos=taxa_sums(ps.pa.pos), pa.neg=taxa_sums(ps.pa.neg),
contaminant=contamdf.prev05$contaminant)
# Plot showing the number of these taxa observed in negative controls and positive samples:
ggplot(data=df.pa, aes(x=pa.neg, y=pa.pos, color=contaminant)) + geom_point() +
xlab("Number of Negative Controls") + ylab("Number of True Samples") + theme_bw() +
labs(color='Contaminants') +
theme(panel.grid.major = element_blank(), legend.position = c(0.9,0.1),
legend.background = element_rect(fill=alpha('white', 0))) +
scale_x_continuous(breaks = seq(0, 8, by = 1)) +
scale_y_continuous(breaks = seq(0, 80, by = 10))+
scale_fill_viridis_d()
# Removing the contaminations
ps
## phyloseq-class experiment-level object
## otu_table() OTU Table: [ 10933 taxa and 90 samples ]
## sample_data() Sample Data: [ 90 samples by 10 sample variables ]
## tax_table() Taxonomy Table: [ 10933 taxa by 7 taxonomic ranks ]
## phy_tree() Phylogenetic Tree: [ 10933 tips and 10931 internal nodes ]
ps.noncontam <- prune_taxa(!contamdf.prev05$contaminant, ps)
# Removing the mock community
ps.noncontam <- prune_samples(sample_names(ps.noncontam) != "I21726-L1", ps.noncontam)
ps.noncontam <- prune_samples(sample_names(ps.noncontam) != "I21743-L1", ps.noncontam)
# Removing the controls out
ps.noncontam <- prune_samples(sample_data(ps.noncontam)$Sample == "Sample", ps.noncontam)
ps.noncontam
## phyloseq-class experiment-level object
## otu_table() OTU Table: [ 10655 taxa and 79 samples ]
## sample_data() Sample Data: [ 79 samples by 10 sample variables ]
## tax_table() Taxonomy Table: [ 10655 taxa by 7 taxonomic ranks ]
## phy_tree() Phylogenetic Tree: [ 10655 tips and 10653 internal nodes ]
# Number of features for each phyla
table(tax_table(ps.noncontam)[, "Phylum"], exclude = NULL)
##
## Abditibacteriota Acidobacteriota Actinobacteriota
## 69 295 3407
## Armatimonadota Bacteroidota Bdellovibrionota
## 58 1174 66
## Campilobacterota Chloroflexi Cyanobacteria
## 11 184 393
## Deferribacterota Deinococcota Dependentiae
## 2 128 2
## Desulfobacterota Elusimicrobiota Entotheonellaeota
## 11 2 1
## Fibrobacterota Firmicutes Fusobacteriota
## 5 1170 55
## Gemmatimonadota Hydrogenedentes Latescibacterota
## 57 1 2
## Methylomirabilota Myxococcota Nitrospirota
## 7 183 10
## Patescibacteria Planctomycetota Proteobacteria
## 24 73 3156
## RCP2-54 SAR324 clade(Marine group B) Spirochaetota
## 2 4 4
## Sumerlaeota Synergistota Verrucomicrobiota
## 2 4 76
## WPS-2 <NA>
## 8 9
ps.noncontam <- subset_taxa(ps.noncontam, !is.na(Phylum) & !Phylum %in% c("", "uncharacterized"))
# Compute prevalence of each feature:
prevdf = apply(X = otu_table(ps),MARGIN = ifelse(taxa_are_rows(ps), yes = 1, no = 2),
FUN = function(x){sum(x > 0)})
prevdf = apply(X = otu_table(ps.noncontam),
MARGIN = ifelse(taxa_are_rows(ps.noncontam), yes = 1, no = 2),
FUN = function(x){sum(x > 0)})
prevdf = data.frame(Prevalence = prevdf,TotalAbundance = taxa_sums(ps.noncontam),tax_table(ps.noncontam))
# Mean and total prevelance of each feature:
phyla_prevelance <- plyr::ddply(prevdf, "Phylum",
function(df1){cbind(mean(df1$Prevalence),sum(df1$Prevalence))})
colnames(phyla_prevelance) <- c("Phylum", "Mean Prevelance", "Total Prevelance(Sum of Prevelance)")
# Phyla Prevelence:
paged_table(phyla_prevelance)
# Plotting abundance before filtering
ggplot(prevdf, aes(TotalAbundance, Prevalence / nsamples(ps.noncontam),color=Phylum)) +
geom_hline(yintercept = 0.05, alpha = 0.5, linetype = 2) + geom_point(size = 0.5, alpha = 0.7) +
scale_x_log10() + xlab("Total Abundance") + ylab("Prevalence [Frac. Samples]") +
facet_wrap(~Phylum) + theme(legend.position="none",
strip.background = element_rect(colour="black",fill="white"),
strip.text.x = element_text(margin = margin(0.05,0,0.05,0, "cm")),
text = element_text(size = 6),
axis.text = element_text(size = 4.5),
panel.background = element_rect(fill = "transparent"),
panel.grid.minor = element_line(color = "lightgray"),
panel.border = element_rect(fill = "transparent",
color = "black"))+
scale_fill_viridis_d()
# Defining phylas to filter:
filterPhyla = c("BRC1", "Deferribacteres","Elusimicrobia","Entotheonellaeota","Hydrogenedentes",
"Latescibacteria","Dependentiae","Cyanobacteria")
# Filter entries with unidentified Phylum.
ps1 = subset_taxa(ps.noncontam, !Phylum %in% filterPhyla)
# Exporting files for MicrobiomeAnalyst analysis:
write.tree(phy = phy_tree(ps1), file = "microbiomeanalyst/genetree_filtered_all.tre", tree.names = TRUE)
write.table(tax_table(ps1),file = "microbiomeanalyst/ASVs_taxonomy_filtered.csv", sep = "\t")
write.table(otu_table(ps1),file = "microbiomeanalyst/ASVs_counts_filtered.csv", sep = "\t")
write.table(sample_data(ps1),file = "microbiomeanalyst/Data_table_filtered.csv", sep = "\t")
sessionInfo()
## R version 3.5.0 (2018-04-23)
## Platform: x86_64-pc-linux-gnu (64-bit)
## Running under: Ubuntu 16.04.7 LTS
##
## Matrix products: default
## BLAS: /opt/R/3.5.0/lib/R/lib/libRblas.so
## LAPACK: /opt/R/3.5.0/lib/R/lib/libRlapack.so
##
## locale:
## [1] LC_CTYPE=en_US.UTF-8 LC_NUMERIC=C LC_TIME=de_DE.UTF-8
## [4] LC_COLLATE=en_US.UTF-8 LC_MONETARY=de_DE.UTF-8 LC_MESSAGES=en_US.UTF-8
## [7] LC_PAPER=de_DE.UTF-8 LC_NAME=C LC_ADDRESS=C
## [10] LC_TELEPHONE=C LC_MEASUREMENT=de_DE.UTF-8 LC_IDENTIFICATION=C
##
## attached base packages:
## [1] stats4 parallel stats graphics grDevices utils datasets methods base
##
## other attached packages:
## [1] decontam_1.2.1 phangorn_2.5.5 ape_5.5 DECIPHER_2.10.2 RSQLite_2.2.1
## [6] Biostrings_2.50.2 XVector_0.22.0 IRanges_2.16.0 S4Vectors_0.20.1 BiocGenerics_0.28.0
## [11] phyloseq_1.26.1 ggplot2_3.3.5 dada2_1.10.1 Rcpp_1.0.7 rmarkdown_2.11
##
## loaded via a namespace (and not attached):
## [1] nlme_3.1-153 bitops_1.0-7 matrixStats_0.61.0
## [4] bit64_4.0.5 RColorBrewer_1.1-2 GenomeInfoDb_1.18.2
## [7] bslib_0.3.0 tools_3.5.0 utf8_1.2.2
## [10] R6_2.5.1 vegan_2.5-7 DBI_1.1.1
## [13] mgcv_1.8-33 colorspace_2.0-2 permute_0.9-5
## [16] ade4_1.7-18 withr_2.4.2 tidyselect_1.1.1
## [19] bit_4.0.4 compiler_3.5.0 Biobase_2.42.0
## [22] DelayedArray_0.8.0 labeling_0.4.2 sass_0.4.0
## [25] scales_1.1.1 quadprog_1.5-8 stringr_1.4.0
## [28] digest_0.6.27 Rsamtools_1.34.1 pkgconfig_2.0.3
## [31] htmltools_0.5.2 highr_0.9 fastmap_1.1.0
## [34] rlang_0.4.11 farver_2.1.0 jquerylib_0.1.4
## [37] generics_0.1.0 hwriter_1.3.2 jsonlite_1.7.2
## [40] BiocParallel_1.16.6 dplyr_1.0.7 RCurl_1.98-1.5
## [43] magrittr_2.0.1 GenomeInfoDbData_1.2.0 biomformat_1.10.1
## [46] Matrix_1.2-9 munsell_0.5.0 Rhdf5lib_1.4.3
## [49] fansi_0.5.0 lifecycle_1.0.0 stringi_1.7.4
## [52] yaml_2.2.1 MASS_7.3-54 SummarizedExperiment_1.12.0
## [55] zlibbioc_1.28.0 rhdf5_2.26.2 plyr_1.8.6
## [58] grid_3.5.0 blob_1.2.2 crayon_1.4.1
## [61] lattice_0.20-45 splines_3.5.0 multtest_2.38.0
## [64] knitr_1.34 pillar_1.6.2 igraph_1.2.6
## [67] GenomicRanges_1.34.0 reshape2_1.4.4 codetools_0.2-18
## [70] fastmatch_1.1-3 glue_1.4.2 evaluate_0.14
## [73] ShortRead_1.40.0 latticeExtra_0.6-28 data.table_1.14.0
## [76] RcppParallel_5.1.4 vctrs_0.3.8 foreach_1.5.1
## [79] gtable_0.3.0 purrr_0.3.4 assertthat_0.2.1
## [82] cachem_1.0.6 xfun_0.26 survival_3.2-13
## [85] tibble_3.1.4 iterators_1.0.13 GenomicAlignments_1.18.1
## [88] memoise_2.0.0 cluster_2.1.2 ellipsis_0.3.2